'''
@Descripttion: 
@Author: dalong
@Date: xx-xxxx-xx
'''
# -*- coding: utf-8 -*-
import scrapy
import json



class BaiduComSpider(scrapy.Spider):
    name = 'GgglxySpider'
    allowed_domains = ['cs.scu.edu.cn']
    start_urls = ['https://cs.scu.edu.cn/index/xytz.htm']

    def parse(self, response):
        for li in response.xpath("//ul[@class='zy_listul']/li"):
            url = response.urljoin(li.xpath("h3/a/@href").extract_first())
            # 新闻主体内容
            # data = response.xpath("//div[@class='detail_zy_c pb30 mb30']")
            # item.dict['content'] = data[0].xpath('string(.)').extract()[0]
            # 进入新闻详情页的抓取方法
            yield scrapy.Request(url, callback=self.parse_dir_contents)

    def parse_dir_contents(self, response):
        item = GgglxySpider.items.GgglxySpiderItem()
        # 保存新闻基本信息
        item['date'] = response.xpath(
            "//div[@class='detail_zy_title']/p/text()").extract_first()
        item['url'] = response.url
        item['title'] = response.xpath(
            "//div[@class='detail_zy_title']/h1/text()").extract_first()

        # 新闻主体内容
        # data = response.xpath("//div[@class='detail_zy_c pb30 mb30']")
        # item.dict['content'] = data[0].xpath('string(.)').extract()[0]
        yield item
